Challenge 3 - Bullet Point 3

Programming Elegant DataVis with tidyverse and ggplot2 R packages

Raveena Chakrapani https://www.linkedin.com/in/raveena-chakrapani-444a60174/ (School Of Computing and Information Systems, Singapore Management University)https://scis.smu.edu.sg/master-it-business
2022-06-14
packages = c('tidyverse','ggplot2','dplyr','patchwork',
             'gganimate','plotly','treemap','d3Tree','ggstatsplot')
for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
jobs <- read_csv("data/Jobs.csv")
emp <- read_csv("data/Employers.csv")
travel <- read_csv("data/TravelJournal.csv")
apartments <- read_csv("data/wkt/Apartments.csv
                       ")

1. Financial Health of employers

hires <- jobs %>%
  group_by(employerId) %>% tally() %>%
  arrange(desc(n))
# employerpay <- jobs %>%
#    group_by(employerId) %>%
#    dplyr::summarise(emppay = weeklypay)
#            
# pay_hires <- merge(x = hires, y = employerpay, by = "employerId", all = TRUE) %>%
#   mutate(employeepay = emppay / n) %>%
#   arrange(desc(employeepay))
jobsnum <- jobs %>% 
  group_by(employerId) %>%
  summarise(jobNum = n(),
            totalPay = sum(hourlyRate),
            avgPay = mean(hourlyRate))

jobsnum <- jobsnum %>%
  rename('Average Hourly Pay' = 'avgPay') %>%
  mutate(group = paste(jobNum, 'Employees'))

tm <- treemap(jobsnum,
            index = c('group', 'employerId'),
            vSize = 'totalPay',
            vColor = 'Average Hourly Pay',
            type = 'value',
            title = 'Employee Wage by Workplace')

#d3Tree(tm, rootname = 'Employee Hourly Wage by Workplace')
#d3tree(tm, rootname = 'Employee Hourly Wage by Workplace')
d3tree(tm)

2. Employment Patterns

jobs<-jobs %>%
  mutate(workinghours=difftime(jobs$endTime,jobs$startTime,units='hours')*5)
jobs<-jobs %>%
  mutate(weeklypay=hourlyRate*workinghours)
jobs$weeklypay=as.numeric(jobs$weeklypay)
jobs <-jobs %>%
    mutate(educationRequirement = factor(jobs$educationRequirement, level = c('Low', 'HighSchoolOrCollege','Bachelors','Graduate')))
weeklypay_education <- jobs %>%
  group_by(educationRequirement) %>%
  summarise(
    n=n(),
    mean=mean(weeklypay),
    sd=sd(weeklypay))%>%
mutate(se=sd/sqrt(n-1))

knitr::kable(head(weeklypay_education), format = 'html')
educationRequirement n mean sd se
Low 119 490.8497 160.4249 14.768306
HighSchoolOrCollege 705 586.9923 247.5990 9.331737
Bachelors 330 934.3181 521.0650 28.727241
Graduate 174 1355.1308 676.4057 51.426170
ggplot(weeklypay_education) +
  geom_errorbar(
    aes(x=educationRequirement, 
        ymin=mean-se, 
        ymax=mean+se), 
    width=0.2, 
    colour="black", 
    alpha=0.9, 
    size=0.5) +
  geom_point(aes
           (x=educationRequirement, 
            y=mean), 
           stat="identity", 
           color="red",
           size = 1.5,
           alpha=1) +
  ggtitle("Fg.1-2 Weekly pay vs educational requirement")+
  theme(plot.title = element_text(hjust = 0.5))

p<- ggplot(jobs, aes(x = educationRequirement, y = hourlyRate, fill=educationRequirement)) + 
  ggdist::stat_halfeye(
    adjust = .5, 
    width = .6, 
    .width = 0, 
    justification = -.3, 
    point_colour = NA) + 
  geom_boxplot(
    width = .25, 
    outlier.shape = NA
  ) +
  geom_point(
    size = 1.3,
    alpha = .3,
    position = position_jitter(
      seed = 1, width = .1
    )
  ) + 
  coord_cartesian(xlim = c(1.2, NA), clip = "off")+
  ggtitle(label = "Wage Distribution for Different Education Level",
          subtitle = "High Wages For Higher Educated")+
  theme_minimal()+
  theme(plot.title = element_text(size=14, face="bold",hjust = 0.5),
          plot.subtitle = element_text(size=12,hjust = 0.5,color='mediumvioletred'))+
  theme(axis.title.y= element_text(angle=0), axis.ticks.x= element_blank(),
        panel.background= element_blank(), axis.line= element_line(color= 'grey'))

ggplotly(p)

3. Turnover Analysis

work_home <- travel %>%
  filter(purpose == "Work/Home Commute") %>%
  group_by(participantId,travelEndLocationId) %>%
  tally() %>%
  select('participantId','travelEndLocationId')
work <- inner_join(x = work_home, y = emp, by= c("travelEndLocationId"="employerId" )) %>%
  select('participantId','travelEndLocationId') %>%
  group_by(participantId) %>%
  tally() %>%
  rename('numberofplacesworked'='n')

workinmoreplaces = work %>%
  filter(numberofplacesworked > 1) %>%
  arrange(desc(numberofplacesworked))
gghistostats(
  data = work, 
  x = numberofplacesworked, 
  xlab = "numbers of places worked", 
  title = "Distribution of turnover rate", 
  test.value = 1,
)